/*
Copyright 2008-2009 Elöd Egyed-Zsigmond, Cyril Laitang
Copyright 2009-2011 Samuel Gesche

This file is part of IPRI News Analyzer.

IPRI News Analyzer is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

IPRI News Analyzer is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with IPRI News Analyzer.  If not, see <http://www.gnu.org/licenses/>.
*/

package zold.proc.tagging;

import proc.tagging.*;
import data.structures.tagging.*;
import data.structures.thema.ThemaItem;
import proc.tagging.TreeTagger;
import data.structures.tagging.LemmaVector;
import data.base.Database;
import data.base.connectors.ThesaurusDatabase;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
import proc.text.XMLCleaner;

/**
 *
 * @author claitang1
 */
public class _ThemaLemmatizer {

   private Database theDB;

    public _ThemaLemmatizer(Database db) {
        theDB = db;
    }

   public void lemmatizeThema() {
        //recupere la liste de toutes les thématiques dans la BD
        ThesaurusDatabase td = new ThesaurusDatabase(theDB);
        TreeTagger tt = new TreeTagger();
        System.out.println("Lemmatization des thematiques");
        try{
            Set<ThemaItem> items = td.getAllClasses();
            // pour tout les items
            for (Iterator iter = items.iterator(); iter.hasNext();){
                ThemaItem item = (ThemaItem) iter.next();
                // tagger le suivant dans tree tagger
                LemmaVector lemmasTitle = tt.processText(XMLCleaner.xmlToText(item.getConceptName()));
                LemmaVector lemmasDesc = tt.processText(XMLCleaner.xmlToText(item.getConceptDesc()));
                // filtrer les lemmes
                /*lemmasTitle.filtre();
                lemmasDesc.filtre();*/
                // mise à jour des champs de lemmes
                item.setTitleLemmas(lemmasTitle.toLemmas());
                item.setDescLemmas(lemmasDesc.toLemmas());
                // update the two RSS_items field that contains lemmas
                //td.UpdateLemmas(item);
                // add the lemmas to the lemma table
                addLemme(item.getConceptID(),lemmasTitle,lemmasDesc);
            }
        }catch (Exception ex){
            //debug
            System.out.println(String.format("Lemme desc insert problem :"+ex.getMessage()));
        }
    }

       // add the lemmas correspondin to the itemId and for the two vectors
    public void addLemme(int idThema, LemmaVector title, LemmaVector desc) {

        ThesaurusDatabase td = new ThesaurusDatabase(theDB);
        HashMap<LemmaInfos, Integer> lemmasCountTitle = title.toLemmasCount();
        HashMap<LemmaInfos, Integer> lemmasCountDesc = desc.toLemmasCount();

        // for each title lemma
        Vector<LemmaInfos> liste = title.getLemmas();
        for (int i = 0; i<liste.size(); i++) {
            String lemma = liste.elementAt(i).get_lemma();
            String lex = liste.elementAt(i).get_lex();
            if (lemmasCountDesc.containsKey(liste.elementAt(i))) { // dans le titre et dans la description
                try{
                    //td.InsertLemma(idThema, lemma, lemmasCountTitle.get(liste.elementAt(i)),lemmasCountDesc.get(liste.elementAt(i)));
                }catch (Exception ex){
                    //debug
                    System.out.println(String.format("Lemme title+desc insert problem :"+ex.getMessage()));
                }
            } else {
                try{
                    //td.InsertLemma(idThema, lemma, lemmasCountTitle.get(liste.elementAt(i)), 0);
                }catch (Exception ex){
                    //debug
                    System.out.println(String.format("Lemme title insert problem :"+ex.getMessage()));
                }
            }
        }

        // for each description lemma
        liste = desc.getLemmas();
        for (int i = 0; i<liste.size(); i++) {
            String lemma = liste.elementAt(i).get_lemma();
            String lex = liste.elementAt(i).get_lex();
            if (!lemmasCountTitle.containsKey(liste.elementAt(i))) {
                try{
                    //td.InsertLemma(idThema, lemma, 0, lemmasCountDesc.get(liste.elementAt(i)), lex);
                }catch (Exception ex){
                    //debug
                    System.out.println(String.format("Lemme desc insert problem :"+ex.getMessage()));
                }
            }
        }
    }


}
